import os

files = os.listdir('/home/cluo/publicdata/session/2012010102')


posscookies = set()
healthywords = set()
for l in open('../ref/6Diseases.dict').readlines():
    healthywords.add(l.strip())
count = 0
for f in files:
    print f
    fin = open('/home/cluo/publicdata/session/2012010102/'+f)
    line = fin.readline()
    while line != '':
        segs = line.strip().split('\t')
        if len(segs) >=5:
            cookieid = segs[0][0:32]
            query = segs[4]
            for w in healthywords:
                if w in query:
                    posscookies.add(cookieid)
        line = fin.readline()

fout = open('../data/allpossiblesessions.dat','w')
for f in files:
    print f
    fin = open('/home/cluo/publicdata/session/2012010102/'+f)
    line = fin.readline()
    while line != '':
        segs = line.strip().split('\t')
        if len(segs) >=5:
            cookieid = segs[0][0:32]
            if cookieid in posscookies:
                fout.write(line)
        line = fin.readline()
